######################### Descriptive Statistics ########################

################################################# set 'working Directory'
setwd("D:/AS17 translation/AS17 supplements")
opar <- par() 
#########################################################################
#                            Data / Sources
# resistivity.csv
#  reliabilty.csv
#########################################################################

                                                          # Section 3.1.1

                                                # blood group A, B, AB, 0
bloodgroup <- c(rep("A", 69), rep("B", 17), rep("AB", 7), rep("0", 62))
vt <- table(bloodgroup); vt  
mode <- (names(vt[vt == max(vt)])); mode
vect <- sample(0:9, size=25, replace=T); vect
vt <- table(vect); mode <- as.numeric(names(vt[vt == max(vt)])); mode 

#########################################################################

absolutely <- c(69, 17, 7, 62)                  # blood group A, B, AB, 
names(absolutely) <- c("A","B","AB","0"); absolutely  
number      <- sum(absolutely); number
relatively  <- absolutely / number; round(relatively, 2)   
percent     <- relatively * 100; round(percent, 1) 

######################################################################### 
                                                     
Gini     <- sum(relatively*(1-relatively)); Gini     # Gini-Simpson index   
  
#########################################################################

                                                          # Section 3.1.4

absolutely  <- c(69, 17, 7, 62)                 # blood group A, B, AB, 0
names(absolutely) <- c("A","B","AB","0"); absolutely 

                                          # bar and pie chart; figure 3.1
library(gplots)  						  
par(mfrow=c(1,3), lwd=1.5, font.axis=2, bty="n", ps=15, cex.axis=1) 
barplot2(absolutely, names.arg=c("A","B","AB","0"), las=1, 
		cex.axis = 1.3, cex.names = 1.3, ylim=c(0,70),
        density=c(10,15,18,20), angle=c(45,135,45,135), col="black")
barplot2(as.matrix(absolutely), names.arg="Blood group", beside = FALSE, 
		ylim=c(0,160), yaxp=c(0,160,8), xlim=c(0,1.5), las=1,
		cex.axis = 1.3, cex.names = 1.1,
		density=c(10,15,18,20),angle=c(45,135,45,135), col="black")	
text(1.4,30,"A",bg="white",cex=1.8); text(1.4,78,"B",bg="white",cex=1.8)	
text(1.4,90,"AB",bg="white",cex=1.8); text(1.4,120,"0",bg="white",cex=1.8)	
pie(absolutely, labels=c("A","B","AB","0"), radius = 1.0,
    density=c(10,15,18,20),angle=c(45,135,45,135), col="black", cex=1.7)  

#########################################################################

                                                          # Section 3.1.5

absolutely <- matrix(c(30, 10, 5, 40, 39, 7, 2, 22), nrow=2, byrow=T)
colnames(absolutely)        <- c("A","B", "AB","0")
rownames(absolutely)        <- c("male", "female")
names(dimnames(absolutely)) <- c("Gender","Blood group"); absolutely
margin.table(absolutely, 1)
margin.table(absolutely, 2)
round(prop.table(absolutely, 1), 3)
round(prop.table(absolutely, 2), 3)

#########################################################################

                           # rectangle diagram and mosaicplot; figure 3.2
par(mfrow=c(1,2), lwd=1.5, font.axis=2, bty="n", ps=14, cex.axis=1) 
barplot(absolutely, density=c(10, 20), col="black", angle=c(45,135),
         las=1, legend.text=T, ylim=c(0,85), xlim=c(0,8))        
mosaicplot(absolutely,  col=c("grey80","grey60","grey40","grey20"), 
           cex=1.0, main=" ")  

#########################################################################

                                                          # Section 3.1.6 

                                       # conditional relative frequencies

                                                      # Simpson's-Paradox
data <- array(c(2,53,1,61, 3,121,5,152, 14,95,7,114, 27,103,12,66,
                51,64,40,81, 29,7,101,28, 13,0,64,0), dim=c(2,2,7),
        dimnames=list(c("deceased","living"),
                      c("Smoker","Non-Smoker"),
        c("18-24","25-35","35-44","45-54","55-64","65-74",">74")))
tsums <- apply(data,c(1,2), sum)

par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="n", ps=12, cex.axis=1) 
mosaicplot(tsums, main=" ")

rbind(tsums, round(tsums[1,]/apply(tsums,2,sum)*100, 1))
for (i in 1:7) { t <- data[,,i]; apply(t,2,sum)
print(cbind(t,round(t[1,]/apply(t,2,sum)*100,1))) }

#########################################################################
                                                      
tau_GK <- function(dat) {                               # Goodman-Kruskal
	N <- sum(dat);
	dat.rows <- nrow(dat); 	dat.cols <- ncol(dat)
	max.col <- sum.col <- L.col <- matrix(,dat.cols)
	max.row <- sum.row <- L.row <- matrix(,dat.rows)
	for(i in 1:dat.cols) sum.col[i] <- sum(dat[,i]); max.col[i] <- max(dat[,i])
	for(i in 1:dat.rows) sum.row[i] <- sum(dat[i,]); max.row[i] <- max(dat[i,])
	max.row.margin <- max(apply(dat,1,sum));
	max.col.margin <- max(apply(dat,2,sum));											
	p.a <- N^2						                 # tau Column|Row
	for(i in 1:dat.rows) p.a <- p.a - N * sum(dat[i,]^2/sum.row[i])
	p.b <- N^2 - sum(sum.col^2);
	tau.CR <- 1 - (p.a / p.b)
	p.a <- N^2						                 # tau Row|Column
	for(j in 1:dat.cols) p.a <- p.a - N * sum(dat[,j]^2/sum.col[j])
	p.b <- N^2-sum(sum.row^2)
	tau.RC <- 1 - (p.a / p.b)
	cat("\ntau_Col:Row = ",round(tau.CR, 3),
	" and tau_Row:Col = ",round(tau.RC, 3),"\n")
}

x <- matrix(c(10,30,5,0,20,30,5,0,0), byrow=T, nrow=3); x
tau_GK(x)

#########################################################################

                                                            # Section 3.2

                                                           # order values
before  <- c(3,  4,  6,  4,  8,  9,  2,  7, 10,  7,  5,  6,  5 )
after   <- c(4,  4,  1,  5,  3,  3,  1,  3,  4,  5,  6,  9,  1 )

before; sort(before)
before; rank(before)

#########################################################################

                                                          # Section 3.2.1

before  <- c(3,  4,  6,  4,  8,  9,  2,  7, 10,  7,  5,  6,  5 )          
vsort <- sort(before); n  <- length(vsort)                # Quantiles                   
Q1       <- vsort[floor((n+1)*0.25)]; Q1
Q2       <- vsort[floor((n+1)*0.50)]; Q2
Q3       <- vsort[floor((n+1)*0.75)]; Q3

median(before); 
quantile(before, c(0.25, 0.50, 0.75))

#########################################################################

                                                          # Section 3.2.3

MA <- mean(abs(before - median(before))); MA         # MAD
 
D  <- mad(before, const=1); D                        # median deviation

#########################################################################

                                                          # Section 3.2.4

                                      # dot plot and box plot; figure 3.4
before  <- c(3,  4,  6,  4,  8,  9,  2,  7, 10,  7,  5,  6,  5 )
after   <- c(4,  4,  1,  5,  3,  3,  1,  3,  4,  5,  6,  9,  1 )

par(mfrow=c(1,2), lwd=1.5, font.axis=2, bty="n", ps=14, cex.axis=1) 
stripchart(list(before, after), method="jitter", jitter=0.1, las=1,  
           vertical=TRUE, group.names=c("before","after"),
           xlim=c(0.5,2.5), ylim=c(0,10), pch=16, cex=1.3)           
boxplot(before, after, range = 1.5, names=c("before","after"), 
        las=1, ylim=c(0,10), col=8)

##########################################################################

                                                           # Section 3.2.5
                                        
kendall.tau <- function(x, y) {                    # Kendall's correlation
  n <- length(x)
  if(n != length(y)) stop("x und y different length")
  x <- rank(x);    y <- rank(y)                    # rank numbers 
  m <- cbind(x,y); m <- m[order(m[,1]),]           # order by x
  x <- m[,1];      y <- m[,2]
  inv <- 0; prov <- 0
  for (i in 1:n) {
    for (j in i:n) {
      if (x[i]<x[j] & y[i]>y[j]) inv  <- inv + 1   # Inversions
      if (x[i]<x[j] & y[i]<y[j]) prov <- prov + 1  # Proversions 
    }  }
  rx <- table(x); Tx <- 0.5*sum(rx*(rx-1))         # Ties in x
  ry <- table(y); Ty <- 0.5*sum(ry*(ry-1))         # Ties in y
  r.tau_b = (prov-inv)/sqrt((prov + inv + Tx)*(prov + inv + Ty)) 
  cat("Number of proversions:",prov,"and inversions:",inv,"\n",
      "Kendalls Tau_b:",r.tau_b,"\n")
}

#########################################################################

x <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)              # types of wine
y <- c(2, 1, 5, 3, 4, 6, 7, 9, 8, 10)              # without ties
kendall.tau(x , y)                            

#########################################################################

x <- c( 8,  5, 10, 10,  8); x; rank(x)             # order numbers
y <- c(15, 12,  6, 16, 12); y; rank(y)             # with ties
kendall.tau(x , y)   
cor(x, y , method = "kendall")                     # spec. funtion cor()     

#########################################################################

                                                          # Section 3.2.6

                                              # partial rank correlation
z <- c(1,2,3,4,5,6)                           # simple example   
x <- c(3,1,4,2,6,5)
y <- c(4,2,1,6,3,5)

tauXZ <- round(cor(x, z, method="kendall"), 4)
tauYZ <- round(cor(y, z, method="kendall"), 4)
tauXY <- round(cor(x, y, method="kendall"), 4)
tauXY.Z <- (tauXY - tauXZ*tauYZ) / sqrt((1-tauXZ^2)*(1-tauYZ^2))
cbind(tauXZ, tauYZ, tauXY, tauXY.Z)

########################################################################

                                              # partial rank correlation
I <- c(1,2,3,4,5,6,7,8,9,10)                  # Intelligence
A <- c(1,4,5,6,2,7,3,9,8,10)                  # mathematics
B <- c(4,1,3,5,2,6,7,10,9,8)                  # musical

tauAI <- round(cor(A, I, method="kendall"), 4)
tauBI <- round(cor(B, I, method="kendall"), 4)
tauAB <- round(cor(A, B, method="kendall"), 4)
tauAB.I <- (tauAB - tauAI*tauBI) / sqrt((1-tauAI^2)*(1-tauBI^2))
round(cbind(tauAI, tauBI, tauAB, tauAB.I),3)

#########################################################################

                                                            # Section 3.3

                                                        # Body Mass Index     
bmi <- c(28.2,23.9,20.3,26.7,25.6,32.5,23.5,19.7,27.8,26.7,20.7,28.4,33.3)
n     <- length(bmi)
Summe <- sum(bmi); Summe
Summe/n                                       # arithmetic mean 
mean(bmi)                                     # R-function mean()       

#########################################################################

bmi <- c(22.2,23.9,20.3,26.7,25.6,22.5,23.5,24.7,27.8,26.7,20.7,26.4,40.3)
sort(bmi)
mean(bmi)
mean(bmi, trim=0.1)                           # trimming mean

#########################################################################

winsor <- function(x, tr=.1) {                # winorizing mean              
    if(tr < 0 || tr > 0.5) stop("Anteil?")
    y <- sort(x)
    n <- length(x)
    ibot <- floor(tr*n)+1; itop <- length(x)-ibot+1
    xbot <- y[ibot];       xtop <- y[itop]
    y <- ifelse(y<=xbot, xbot, y)
    y <- ifelse(y>=xtop, xtop, y)
    mean(y)
}
bmi <- c(22.2,23.9,20.3,26.7,25.6,22.5,23.5,24.7,27.8,26.7,20.7,26.4,40.3)
mean(bmi)
winsor(bmi)

##########################################################################

bmi <- c(28.2,23.9,20.3,26.7,25.6,32.5,23.5,19.7,27.8,26.7,20.7,28.4,33.3)
n   <-length(bmi); m <- mean(bmi)
saq   <- (bmi - m)^2                
sqrt(sum(saq)/(n-1))                          # standard deviation
sd(bmi)                                       # R-funtion sd()     

#########################################################################

                                      # combination of mean and variances
                      
s1 <- c(40,50,72); s2 <- c(30,60,80,90,100);  s3 <- c(40,50,60,70)
n <- c(3, 5, 4)
m <- c(mean(s1), mean(s2), mean(s3))
s <- c(sd(s1), sd(s2), sd(s3)); v <- s^2

#########################################################################
                                            
mv.combi <- function(n, m, v) { # function according to nach Yiu-Man Chan
    r   <- length(n)
    n.t <- sum(n);                  m.t <- sum(n*m) / n.t
    ts1 <- sum((n-1)*v);    ts2 <- 0
    for (k in 1:(r-1)) for (l in (k+1):r) 
        ts2 <- ts2 + n[k]*n[l]*(m[k]-m[l])^2 
    v.t <- (ts1 + (1/n.t)*ts2) / (n.t-1)
    cat("\n","Sum =",n.t," - Mean =",m.t," - Variance =",v.t)
}
mv.combi(n=c(3, 5, 4), m=c(54, 72, 55), v=c(267.98, 770.06, 166.67))

#########################################################################

library(Hmisc)                                # Errorbar-Plot; Figure 3.6
x <- 1:5 
# y <- c(5, 6.5, 9, 4, 6) + rnorm(5)
# delta <- c(0.4, 0.5, 0.4, 0.3, 0.5) + rnorm(5)

y <- c(4.47, 6.43, 7.52, 3.72, 6.73)
delta <- c(1.71, 0.85,  1.43, 0.6,  1.34)

par(mfrow=c(1,1), lwd=2.0, font.axis=2, bty="l", ps=16, cex.axis=1)
errbar( x, y, y + delta, y - delta , ylim=c(2, 10), xlab=" ", pch=15,
        las=1, lwd=2.0, cex=2.0, ylab=expression(bar(x) %+-% s))
abline(h=seq(2,10,1), lty=2, col="grey")

#########################################################################

                                                          # Section 3.3.7

gew.stats <- function(meanis, ni, varis=NULL) {
  k <- length(meanis);     n <- sum(ni)
  mean <- sum(ni*meanis)/n
  if (is.null(varis)) {               # weighted mean - equal variances 
    mgew <- sum(ni*meanis)/n
    cat("\n","Weighted mean:", round(mgew, 2),"\n")
  }
  if (!is.null(varis)) {              # weighted meand and variance
    mgew <- sum(ni*meanis/varis)/sum(ni/varis)
    sin  <- sqrt(sum(varis*(ni-1)) / (n-k))
    vgew <- (sum((ni-1)*varis) + sum(ni*(meanis - mean)^2)) / (n-1)
    cat("\n","Weighted mean            :", round(mgew, 2),"\n",
             "Standard deviation within:", round(sin, 3),"\n",
             "Weighted variance        :", round(vgew, 3),"\n")
  }
}
n <- c(8, 10, 6);  m <- c(9,  7, 8);  s <- c(2,  1, 2) 
gew.stats(m, n)                       # equal variances
gew.stats(m, n, s^2)                  # unequal variances

#########################################################################
        
grade   <- c(  2,   3,   3,   2,   1)
weight  <- c(0.3, 0.3, 0.2, 0.1, 0.1)
weighted.mean(grade, weight)          # R-function weighted.mean()

#########################################################################

                                                          # Section 3.3.8

                                            # growth rate (%) 
rt <- 1:10                                          
n_rule  <- round(70/rt, 2)                  # rule of 70)
n_exact <- round(log(2)/log(1+(rt/100)), 2)
tab <- cbind(rt, n_rule, n_exact); tab

#########################################################################

salary    <- c(1.025, 1.10, 1.22)           # salary increases         
lg.salary <- log10(salary)
10^mean(lg.salary)                          # average increase 

#########################################################################

                                                          # Section 3.3.9

                                                # harmonic mean
pieces      <- c(10, 5, 8)                      # cost / number of pieces    
rez.pieces <- 1/pieces; n <- length(pieces)
n / sum(rez.pieces)                             # harmonic mean    

#########################################################################

                                                          # Section 3.4.2

                              # standard error of multiple determinations
mat <- matrix(c(11, 13, 12, 27, 25, 29, 43, 47, 42, 63, 57, 60), 
              nrow=4, byrow=T); mat
Bmean <- apply(mat, 1, mean); Bmean
Bsaq  <- apply(mat, 1, function(x) sum((x - mean(x))^2)); Bsaq
sMB <- sqrt(sum(Bsaq) / 8); sMB
Pmean <- apply(mat, 2, mean); Pmean
Psaq <- apply(mat, 2, function(x) sum((x - mean(x))^2)); Psaq
Pv   <- Psaq/3; Pv
quotient <- sMB / sqrt(sum(Pv)/3); quotient

#########################################################################

                                                          # Section 3.4.4

                                               # precsion of measurements
                                               # electrical resistivity
resist <- read.csv("resistivity.csv", 
                   header = TRUE, sep = ";", dec=",", fill = FALSE)
attach(resist); resist[1:3,]

N <- 6                            # repetitions 
L <- 2                            # series         
K <- 6                            # days           
                                  # repeatability in the series
s1 <- sqrt(sum(resist$stdev^2)/(K*L)); round(s1, 4)
                                  # reproducibility from day to day         
s2.1 <- sd(resist$xmean[serie==1]); round(s2.1, 4)
                                                        
s2.2 <- sd(resist$xmean[serie==2]); round(s2.2, 4)
                                                         
s2 <- sqrt((s2.1^2 + s2.2^2)/L); round(s2, 4)
                                  # stability from series to series            
m1 <- mean(resist$xmean[serie==1])
m2 <- mean(resist$xmean[serie==2])
s3 <- sd(c(m1, m2)); round(s3, 4)
                                  # uncertainty for a single result      
s.R <- sqrt(s3^2 + ((K-1)/K)*s2^2 + ((N-1)/N)*s1^2); round(s.R, 4)


#########################################################################

                                                            # Section 3.5

                                                       # histogramm (BMI)
bmi <- c(20.8, 29.7, 27.6, 28.6, 20.7, 21.0, 23.1, 21.9, 24.8, 25.3, 27.1, 23.3,
         19.5, 25.2, 25.8, 21.6, 28.7, 30.6, 23.3, 26.6, 35.3, 17.0, 22.6, 25.9,
         29.0, 23.7, 21.7, 26.5, 18.5, 24.5, 29.0, 23.2, 27.9, 18.8, 27.1, 21.5,
         26.5, 20.3, 25.5, 32.0, 26.7, 34.9, 24.6, 25.6, 26.7, 22.1, 28.8, 28.1,
         28.8, 32.2, 30.3, 24.9, 28.0, 21.1, 22.0, 25.5, 24.0, 26.6, 24.7, 28.8)

                                                             # figure 3.9   
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", ps=11, cex.axis=1) 
hist(bmi, breaks=c(16, 18,20,22,24,26,28,30,32,34,36), col="grey", las=1,
     cex.axis=1.2, cex.lab=1.2,
     xlim=c(15,40), xlab="Body Mass Index", ylab="Frequency", main=" ")
abline(h=seq(0,12,2,), lty=2, col="grey")

#########################################################################

                                                              # table 3.8              
h <- hist(bmi, breaks=c(16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36), 
          plot=FALSE)
h$breaks
h$mids
h$counts
cumsum(h$counts)
round(h$counts/length(bmi),3)*100
round(cumsum(h$counts)/length(bmi),3)*100
                                                            # figure 3.10
h <- hist(bmi, breaks=c(16, 18,20,22,24,26,28,30,32,34,36), plot=FALSE)
par(mfrow=c(1,2), lwd=1.5, font.axis=2, bty="n", ps=14, cex.axis=1) 
x <- h$breaks;  y <- c(0, round(cumsum(h$counts)/length(bmi),3))
plot(x, y, type="b", ylim=c(0,1), xlim=c(15,40), las=1, lwd=2,
           xlab="Body Mass Index", ylab="relative cum. frequency" )
plot.ecdf(bmi, xlab="Body Mass Index", col.vert = "gray20", las=1, xlim=c(15,40), main=" ")
abline(h=0.8, lty=2, col="grey")
abline(v=28.5, lty=2, col="grey") 

#########################################################################

                                                            # figure 3.11
par(lwd=1.5, font.axis=2, ps=14, cex.axis=1, bty="o") 
op <- par(mar=c(0,0,0,0), oma=c(0,0,0,0)+.1)
layout(matrix(c(1,1,1,2), nc=1))
plot(sort(bmi), ppoints(length(bmi)), type="l", lwd=2, 
            xlab="", ylab="", main="", xaxt="n")
axis(1, labels = TRUE, tick = TRUE)
text(19,0.28,"0.25 (1. quartile)", cex=1.5)
text(19,0.53,"0.50 (median value)", cex=1.5)
text(19,0.78,"0.75 (3. quartile)", cex=1.5)
abline(h = c(0,.25,.5,.75,1), lty=2)
abline(v = quantile(bmi), col = "blue", lwd = 2, lty=2)
points(quantile(bmi), c(0,.25,.5,.75,1), lwd= 8, col="blue")
boxplot(bmi, horizontal = TRUE, col = "grey", 
            lwd=2, cex=1.5, pch=16, axes = FALSE)

#########################################################################

                                          # quantile from classified data
quantil.hist <- function(p, breaks, freq) { 
  n <- sum(freq); rel <- freq/n
  if(any(p<=0) | any(p>=1)) return("Fehler: 0<p<1")
  cum <- cumsum(c(0, rel))
  loc <- apply(outer(p, cum, ">="), 1, sum)
  return(breaks[loc] + (p - cum[loc])/rel[loc] *
           (breaks[loc+1] - breaks[loc]))   }

#########################################################################

ecdf.hist <- function(x, breaks, freq) {  # proportions from classified data
  n <- sum(freq); rel <- freq/n
  if(x < breaks[1]) return("Fehler: x < min")
  else if(x > breaks[length(breaks)])
    return("Fehler: x > max")
  else {loc <- max(which(breaks <= x))
  return(sum(rel[1:(loc-1)])+(x-breaks[loc])/
           (breaks[loc+1] - breaks[loc]) * rel[loc])}   } 

#########################################################################

                                                            # figure 3.12
breaks <- c(16, 18, 20, 22, 24, 26, 28, 30, 32, 34)
mids   <- c(17, 19, 21, 23, 25, 27, 29, 31, 33, 35)
counts <- c( 1,  3, 10,  8, 12, 11,  9,  3,  1,  2)
library(HistogramTools)
barplot(counts, names.arg=mids, las=1, xlab="Body Mass Index")
quantil.hist(c(0.25, 0.50, 0.75), breaks, counts)      # approximated
quantile(bmi, probs=c(0.25, 0.50, 0.75))               # exact
ecdf.hist(x=30, breaks, counts)

#########################################################################

                                                          # Section 3.5.3

                                            # function for Pareto diagram 
pareto <- function (x, main = "", ylab = "Value") {
  op <- par(mar = c(5, 4, 4, 5) + 0.1, las = 2)
  x <- rev(sort(x))
  plot( x, type = 'h', axes = F, lwd = 16,
                  xlab = "", ylab = ylab, main = main )
  axis(2)
  points( x, type = 'h', lwd = 12, col = heat.colors(length(x)) )
  y <- cumsum(x)/sum(x)
  par(new = T)
  plot(y, type = "b", lwd = 3, pch = 7, axes = FALSE, 
       xlab='', ylab='', ylim=c(0,1), main='')
  points(y, type = 'h')
  axis(4)
  par(las=0)
  mtext("kumulierter Anteil", side=4, line=3)
  print(names(x))
  axis(1, at=1:length(x), labels=names(x))
  par(op)
}
       
defect <- c(12, 2, 32, 4, 19, 9, 1)                                  
names(defect) <- c("A", "B", "C", "D", "E", "F", "G")
                                                            # figure 3.13       
par(mfrow=c(1,1), lwd=2.5, font.axis=2, bty="u", 
            ps=10, cex.axis=0.8, cex=1.3) 
pareto(defect, ylab="Fehler-H?ufigkeit")

#########################################################################

library(qcc)                             # pareto.chart() in library(qcc)

defect <- c(12, 2, 32, 4, 19, 9, 1)                         # figure 3.13                          
names(defect) <- c("A", "B", "C", "D", "E", "F", "G")         
pareto.chart(defect, ylab = "Frequency of errors", main =" ", 
                  xlab="Cause of error", las=3, col="grey")


#########################################################################

                                                            # Section 3.6

gini <- function(x, y) {                   # function GINI coefficient  
    area <- 0                              # trapezoidal rule              
    for (i in 2:(n+1)) area <- area + 0.5*((x[i]-x[i-1])*(y[i]+y[i-1]))
    gini <- 1 - 2*area; round(gini, 3)     # Gini coefficient                          
}
x <- c(2, 8, 10, 15, 20, 45);  n <- length(x)
u <- c(0, (1:n)/n)                 # absissa - relative index          
v <- c(0, (cumsum(x) / sum(x)))    # ordinate  - cumulative rel. shares 
gini(u, v)
                                                            # figure 3.14
														 
                                   # Lorenz curve und Gini coefficient
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="n", ps=14)
x <- c(2, 8, 10, 15, 20, 45);  n <- length(x)
u <- c(0, (1:n)/n)                 # absissa - relative index          
v <- c(0, (cumsum(x) / sum(x)))    # ordinate  - cumulative rel. shares  
plot(u, v, type="b", cex=1.0, xlim=c(0,1), ylim=c(0,1), las=1, 
     xlab="Companies shares (u)", 	ylab="Market shares (v)")
abline(0,1, col="red", lty=2)
text(0.8, 0.20, paste("Gini coeffizient =",round(gini(u, v),3),"(G=2F)"))
text(0.35, 0.6, "Area of concentration F")
lines(c(0.4,0.6),c(0.55,0.4), lwd=2)
lines(c(0,0.5,0.5), c(0.2,0.2,0), lty=2)
polygon(c(u,0), c(v,0), angle = -45, border=NA, density = 10)

#########################################################################

                                                
gini_num <- function(x, unbiased =FALSE) {     # numeric Gini coefficient 
N    <- length(x); mu   <- mean(x)
n    <- if (unbiased) N * (N-1) else N * N
ox   <- x[order(x)]
dsum <- drop(crossprod(2 * 1:N - N - 1, ox))
round(dsum / (mu * n), 3)
}
x <- c(2, 8, 10, 15, 20, 45); gini_num(x, unbiased=FALSE)

#########################################################################

                                                         # section 3.7.2


                               # age and height in Kalama - Tabelle 3.10

                                                           # figure 3.17
age    <- seq(18, 29, by=1)
height <- c(76.1, 77.0, 78.1, 78.2, 78.8, 79.7, 79.9, 81.1, 81.2, 
            81.8, 82.8, 83.5)

par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", ps=14, cex.axis=1) 
plot(age, height, pch=16, cex=1.5, las=1,
     xlab="Age [months]", ylab="Height [cm]",
     xlim=c(17, 30), ylim=c(75, 85))
     
#########################################################################

                                                          # Section 3.7.3                       
x <- c(13, 17, 10, 17, 20, 11, 15)
y <- c(12, 17, 11, 13, 16, 14, 15)

var(x); var(y); 
cov(x, y)                                               # covariance 
cor(x, y)                                               # correlation

#########################################################################

                                                          # Section 3.7.5 

                                                        # autocorrelation
x <- c(2.4, 2.4, 2.3, 2.2, 2.1, 1.8, 2.3, 2.3, 2.2, 2.0, 1.9, 1.7, 2.2, 1.8, 3.2,
	   3.2, 2.7, 2.5, 2.2, 1.9, 1.9, 1.8, 2.7, 2.5, 2.3, 2.0, 2.6, 2.4, 1.8, 1.7) 
n <- length(x)
t <- seq(1, n, by=1)
acorr <- acf(x, lag.max=5, plot=FALSE); acorr

                                                        # figure	3.20						 
par(mfrow=c(1,2), lwd=2, font.axis=2, bty="n", ps=14)
plot(t, x, ylim=c(1.5,3.5), xlim=c(0,n), las=1)
acorr <- acf(x, lag.max=5, plot=FALSE)
plot(acorr, main=" ",las=1,ylab="Autocorrelation", xlab="Distance (lag)")


#########################################################################

                                                          # Setion 3.7.6

                                             # reliability - data fictive
test <- read.csv("reliability.csv", header = TRUE, sep = ";")
attach(test); test[1:3,]
k <- 6                                        # number of items     
cor(score, r_score)                           # test-retest reliability
                                              # split-half               
test1 <- cbind(item1, item2, item3); score1 <- apply(test1, 1, sum)
test2 <- cbind(item4, item5, item6); score2 <- apply(test2, 1, sum)

r_tt  <- cor(score1, score2); r_tt            # reliability            

r_tt1 <- 2*r_tt / (1+r_tt); r_tt1             # Spearman-Brown correction 
                                              # Guttmann coefficient     
r_tt2 <- 2 * (1 - (var(score1) + var(score2))/var(score)); r_tt2

mat <- cbind(item1, item2, item3, item4, item5, item6)
si  <- sum(diag(cov(mat)))                    # item variances          
st  <- var(score)                             # total variance in score  
         
alpha <- k/(k-1) * (1 - si/st); alpha         # Cronbach Alpha  

#########################################################################

library(psych)
alpha(mat)                          # function alpha() in library(psych)

#########################################################################

                                                          # Section 3.7.7

                                                      # rank correlation
L <- c(1, 2, 2, 2, 3, 3, 4, 4); M <- c(2, 4, 1, 3, 4, 3, 4, 3)
r.L <- rank(L, ties.method="average"); r.L            # rank numbers to x
r.M <- rank(M, ties.method="average"); r.M            # rank numbers to y

D <- r.L - r.M; n <- length(D)
1- 6*sum(D^2)/(n*(n^2-1))        # rank correlation coefficient (Spearman)
cor(r.L, r.M)                    # correlation coefficient from rank numbers
cor(L, M, method="spearman")     # function cor() with method...
#########################################################################

                                                          # Section 3.7.9

                                                      # linear regression

                                                   # exposure to asbestos
asbestos <- c(50, 400, 500, 900, 1100, 1600, 1800, 2000, 3000)
lungca   <- c(2, 6, 5, 10, 26, 42, 37, 28, 50)
lm(lungca ~ asbestos)
                                                     
#########################################################################
reg <- lm(lungca ~ asbestos); reg$coefficients
lungca.hat <- reg$coefficients[2] + reg$coefficients[2]*asbestos
resid      <- lungca - lungca.hat
                                                            # figure 3.21
par(mfrow=c(1,2), lwd=1.5, font.axis=2, bty="l", ps=14, cex.axis=1) 
plot(asbestos, lungca, pch=16, cex=1.5, las=1, 
     ylab="Development of lung tumor [%]", 
     xlab="Exposures to asbestos [fibers/ml]", 
     ylim=c(0, 50), xlim=c(0,3000))
abline(reg$coefficients[1], reg$coefficients[2], col="red", lwd=2)
plot(asbestos, resid, cex=1.5, las=1, xlim=c(0, 3000), pch=16, ylim=c(-10, ++10),
     xlab="Exposures to asbestos [fibers/ml]", ylab="Residuals")
abline(h=0, col="red", lwd=2)

#########################################################################

                                                      # Section 3.7.10.3

                                                
x <- c(13, 17, 10, 17, 20, 11, 15)                    # data fictive
y <- c(12, 17, 11, 13, 16, 14, 15)
                                                 # orthogonal regression
Q.x  <- sum((x - mean(x))^2); Q.y  <- sum((y - mean(y))^2)
Q.xy <- sum((x - mean(x))*(y - mean(y)))

b <- (-(Q.x-Q.y)+sqrt((Q.x-Q.y)^2+4*Q.xy^2)) / (2 * Q.xy); b
a <- mean(y) - b*mean(x); a

#########################################################################

                                                            # figure 3.23
r1 <- lm(y ~ x); a1 <- r1$coeff[1]; b1 <- r1$coeff[2]
r2 <- lm(x ~ y); a2 <- r2$coeff[1]; b2 <- r2$coeff[2]

par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", 
    cex.lab=1.4, ps=12, cex.axis=1) 
plot(x, y, pch=16, cex=1.4, ylab="Y", xlab="X", las=1,
      ylim=c(10, 20), xlim=c(10, 20))
abline(a, b, col="red", lwd=2)
abline(a1, b1, lty=2)
abline(-a2/b2, 1/b2, lty=2)

#########################################################################

                                                         # Section 3.7.11

                                               # robust linear regression 
library(quantreg)
library(MASS)
                            #  international calls in Belgium 1950 - 1973
year   <- seq(1950, 1973, by=1)
number <- c(0.44,0.47,0.47,0.59,0.66,0.73,0.81,0.88,1.06,1.2,1.35,1.49,1.61,
             2.12,11.90,12.40,14.20,15.90,18.20,21.20,4.30,2.40,2.70,2.90)
OLS.regr <- lm(number ~ year); OLS.regr 
LAD.regr <- rq(number ~ year, tau=0.5); LAD.regr
HUBER.regr <- rlm(number ~ year, psi = psi.huber, k2 = 1.345); HUBER.regr


                                                            # figure 3.24
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", ps=12, 
    cex.lab=1.4, cex.axis=1.2) 
plot(year, number, pch=16, cex=1.3, ylab="Number", las=1,
       xlab="Year", ylim=c(0, 22), xlim=c(1950,1975))
abline(OLS.regr$coefficients[1], 
       OLS.regr$coefficients[2],   col="red", lwd=2, lty=1)
text(1970, 11, "OLS", cex=1.3)
abline(LAD.regr$coefficients[1], 
       LAD.regr$coefficients[2],   col="blue", lwd=2, lty=2)
text(1968, 1.2, "LAD", cex=1.3)
abline(HUBER.regr$coefficients[1], 
       HUBER.regr$coefficients[2], col="blue", lwd=2, lty=3)
text(1971, 5.6, "HUBER", cex=1.3)

#########################################################################

                                                         # Section 3.7.12

                                                   # nonlinear regression

                                                            # figure 3.25
par(mfrow=c(1,3), lwd=2, font.axis=2, bty="l", ps=14, 
    cex.axis=1.2, cex.lab=1.4) 
x <- seq(0, 15, by=0.1)
y1 <-  1 + 1*x + 0.05*x^2; y2 <-  1 + 1*x - 0.05*x^2
y3 <- 10 - 1*x + 0.05*x^2; y4 <- 10 - 1*x - 0.05*x^2
plot(x, y1, type ="l", main="quadratic", xlab=" ", las=1,
      ylab=" ", ylim=c(0,15))
lines(x, y2); lines(x, y3); lines(x, y4)
text(12.1, 12, expression(y[1] == 1 + 1*x + 0.05*x^2), cex=1.3)
text(12,  7, expression(y[2] == 1 + 1*x - 0.05*x^2), cex=1.3)
text( 9.1, 4.5, expression(y[3] == 10 - 1*x + 0.05*x^2), cex=1.3)
text(11.2,  1, expression(y[4] == 10 - 1*x - 0.05*x^2), cex=1.3)

x <- seq(0.1, 12, by=0.1); y1 <- 1  + 10/x; y2 <- 20 - 10/x
plot(x, y1, type="l", main="hyperbolic", xlab=" ", las=1,
      ylab=" ", ylim=c(0,20))
lines(x, y2)
text( 7, 4, expression(y[1] == 1 + frac(10,x)), cex=1.3)
text( 5,16, expression(y[2] == 20 - frac(10,x)), cex=1.3)

x <- seq(0.1, 12, by=0.1); e <- exp(1)
y1 <- 0.1 * e^(0.5*x); y2 <-  10 * e^(-0.5*x)
plot(x, y1, type="l", main="exponential", xlab=" ", las=1,
      ylab=" ", ylim=c(0,20))
lines(x, y2)
text(7.7, 15, expression(y[1] == 0.1 * plain(e)^{0.5*x}), cex=1.3)
text(3,  8, expression(y[2] ==  10 * plain(e)^{-0.5*x}), cex=1.3)

#########################################################################

                                                   # nonlinear regression 
x <- c(1, 2, 3, 4, 5)
y <- c(4, 1, 3, 5, 6)
nls( y ~ a + b*x + c*x^2, start = list(a = 1, b = 1, c = 1))

                                               # quality of fit
mod    <- nls( y ~ a + b*x + c*x^2, start = list(a = 1, b = 1, c = 1))
formula(mod); coef(mod)
new <- data.frame(x = c(1,2,3,4,5))
predict(mod, new)

#########################################################################
                                                
a <- coef(mod) [1]; b <- coef(mod) [2]; c <- coef(mod) [3]
y.hat  <- predict(mod, new)
x.line <- seq(1, 5, by=0.1)
y.line <- a + b*x.line + c*x.line^2
                                                            # figure 3.26
par(mfrow=c(1,1), lwd=1.5, font.axis=2, bty="l", ps=14, 
    cex.lab=1.4, cex.axis=1.2) 
plot(x, y, xlab="x - value", ylab="y - value", las=1,
     ylim=c(1,7), cex=2, pch=16)
points(x, y.hat, cex=2, pch=18, col="red")
lines(x.line, y.line, lty=2, lwd=2, col="red")

#########################################################################

x <- seq(0, 100,by=.5)                            # asymptotic regression
a <- 10; b <- 2 ; c <- -2.8
t0.5 <- log(2)/exp(c); t0.5
y.asymp <- a + (b-a)*exp(-exp(c)*x)
                                                            # figure 3.27
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
    cex.lab=1.5, cex.axis=1.2)
plot(x, y.asymp, type="l", las=1, col="red",
     ylim=c(0,12), xlab=" ", ylab="y")
abline(h=a, lty=2); abline(h=b, lty=2)
y1 <- a + (b-a)*exp(-exp(c)*t0.5)
lines(c(t0.5,t0.5), c(0,y1), lty=3)
lines(c(0, t0.5), c(y1, y1), lty=3)
text(50, a+0.5, paste("a = ",a), cex=1.3)
text(50, b+0.5, paste("b = ",b), cex=1.3)
text(45, b+(a-b)/2, paste("c = ",c," (t = ",round(t0.5,2),")"), cex=1.3)

#########################################################################

x <- seq(0, 100,by=.5)                 # asymtotic regression with offset
a <- 10; b <- -2.8; c <- 15
t0.5 <- log(2)/exp(b); t0.5
y.asympoff <- a*(1-exp(-exp(b)*(x-c)))

par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
    cex.lab=1.5, cex.axis=1.2)
plot(x, y.asympoff, type="l", ylim=c(0,12), xlab=" ", las=1, col="red",
     ylab="y (asymptotic regression with offset)")
abline(h=a, lty=2)
y1 <- a*(1-exp(-exp(b)*(x-c)))
lines(c(c, c), c(0, a/2+1), lty=3)
lines(c(c, c+t0.5), c(a/2, a/2), lty=3)
text(50, a+0.5, paste("a = ", a), cex=1.3)
text(c, a/2+1.5, paste("c = ",c), cex=1.3)
text(50, a/2, paste("b = ",b," (t = ",round(t0.5,2),")"))
                                                     
#########################################################################

x <- seq(0, 100,by=.5)                             # logistic regression
a <- 10; b <- 50;  c <- 10 
y.logis <- a / (1 + exp((b-x)/c))
                                                            # figure 3.28
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
    cex.lab=1.5, cex.axis=1.2)
plot(x, y.logis, type="l", ylim=c(0,12), xlab=" ", ylab="y", las=1, col="red")
abline(h=a, lty=2)
y1 <- a - 2
lines(c(b, b), c(0, y1), lty=2)
y2 <- 0.73 * a; y2
x2 <- b - c * log(1/0.73 - 1); x2
lines(c(b, x2), c(y2, y2), lty=2)
text(80, a+0.5, paste("a = ",a), cex=1.3)
text(50, y1 + 0.5, paste("b = ",b), cex=1.3)
text(75, y2, paste("c = ",c), cex=1.3)
                                                
#########################################################################

x <- seq(0, 10, by=.05)                              # compartment models
dose <- 1.5; elim <- 0.5; absorp <- 2.0; clear <- 0.2
a <- exp(elim); a
b <- exp(absorp); b
c <- exp(clear); c
y.fol <- (dose * elim * absorp / clear * (absorp-elim)) * 
  (exp(-elim*x)-exp(-absorp*x))
                                                            # figure 3.29
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
    cex.lab=1.5, cex.axis=1.2)
plot(x, y.fol, type="l", ylim=c(0,6), xlab=" ", ylab="y", las=1, col="red")

#########################################################################

                                                 # Michaelis-Menton model 
conc <- c(0.02, 0.02, 0.06, 0.06, 0.11, 0.11, 0.22, 
          0.22, 0.56, 0.56, 1.10, 1.10)
rate <- c(76,  47,  97, 107, 123, 139, 159, 152, 191, 201, 207, 200) 
nls(rate ~ SSmicmen(conc, Vm, K))
                                                           # figure 3.30
par(mfrow=c(1,1), lwd=2, font.axis=2, bty="l", ps=14,
    cex.lab=1.1, cex.axis=1.2)
fmod <- nls(rate ~ SSmicmen(conc, Vm, K))
plot(conc, rate, xlab="Concentration [ppm]", las=1, 
      ylab=expression(Counts / min^2),
     xlim=c(0,1.2), ylim=c(40,220), cex=1.5, pch=16)   
lines(conc, predict(fmod, list(conc = conc)), lwd=2, col="red")

#########################################################################

                                                # normal equation example
x <- c(1, 2, 3, 4, 5)
y <- c(3, 7,12,26,51)
nls( y ~ a*b^x, start = list(a = 1, b = 1))

#########################################################################

                                                          #section 3.7.12

                                            # linearizing transformations 

                                                            # figure 3.32
par(mfrow=c(4,2), lwd=2, font.axis=2, bty="l", ps=14,
    cex.lab=1.1, cex.axis=1.2)
x <- seq(0, 10, by=0.1)    
a <- 2;  b <- 0.2
y1 <- a * exp(b*x)                           # exponentialfunction         
plot(x, y1, type="b", ylim=c(0, 10), cex = 0.5, 
     ylab="y", main="Exponentialfunction: A ")
abline(h=2, lty=3)
text(0.1, 3, "a")
text(3.5, 6, "a>0, b>0")
a <- 2;  b <- -0.4 
y1 <- a * exp(b*x) 
lines(x, y1, type="b", ylim=c(0, 10), cex =0.5, lty = 2)
text(5, 1, "a>0, b<0")

a <- 0.1; b <- 2.5
y2 <- a * x^b                               # power function             
plot(x, y2, type="b", ylim=c(0, 10), cex =0.5,
     ylab="y", main="Powerfunction")
abline(a, 1, lty=3)
text(4.5, 9, "a>0, b>1")
text(9, 8, "b=1")
a <- 2; b <- 0.5
y2 <- a * x^b                           
lines(x, y2, type="b", ylim=c(0, 10), cex =0.5, lty = 2)
text(7, 4, "a>0, b<1")

a <- 8; b <- 0.5
y3 <- a*x / (b + x)                         # hyperbola           
plot(x, y3, type="b", ylim=c(0, 10), cex =0.5,
     ylab="y", main="Hyperbola")
abline(h=8, lty=3)
text(0.1, 8.8, "a")
text(3, 5.5, "a>0, b>0")

a <- 8; b <- -0.7
y4 <- (1 - exp(b*x))*a                      # exponent. saturation        
plot(x, y4, type="b", ylim=c(0, 10), cex =0.5,
     ylab="y", main="Exponential saturation")
abline(h=8, lty=3)
text(0.1, 8.8, "a")
text(3, 5.5, "a>0, b<0")

a <- 8; b <- -0.7
y5 <- a / (1 + 10*exp(b*x))                 # exponent. sigmoid          
plot(x, y5, type="b", ylim=c(0, 10), cex =0.5,
     ylab="y", main="Expontial sigmoid")
abline(h=8, lty=3)
text(0.1, 8.8, "a")
text(3, 1, "a>0, b<0, c>0")

a <- 4; b <- -0.8
y6 <- a / (b + x)                           # modified inverse       
plot(x, y6, type="b", ylim=c(0, 10), cex =0.5,
     ylab="y", main="Modified inverse")
abline(h=a/b, lty=3)
abline(v=-b, lty=3)
text(0.3, 9, "x = -b")
text(2.8, 9, "a>0, b<0")
a <- 4; b <- 0.7
y6 <- a / (b + x)   
lines(x, y6, type="b", ylim=c(0, 10), cex =0.5, lty = 2)
text(2.5, 0.5, "a>0, b>0")

a <- 5; b <- -0.5
y7 <- a * exp(b/x)                         # modified exponential
plot(x, y7, type="b", ylim=c(0, 10), cex =0.5,
     ylab="y", main="Exponentialfunction: B")
abline(h=5, lty=3)
text(0.3,5.8,"a")
text(2.5, 3, "a>0, b<0")
a <- 5; b <- 0.3
y7 <- a * exp(b/x)  
lines(x, y7, type="b", ylim=c(0, 10), cex =0.5, lty = 2)
text(2, 8, "a>0, b>0")

a <- 10; b <- -0.4
y8 <- a * x * exp(b*x)                     # Maximafunction            
plot(x, y8, type="b", ylim=c(0, 10), cex =0.5,
     ylab="y", main="Maximafunction")
text(5, 4, "a>0, b<0")

#########################################################################

                                                            # Section 3.8

                                               # nonparametric regression

# x <- seq(0, 15, by=.1)                       # fictive data .....
# n <- length(x); x <- x + rnorm(n)
# y <- 2*x/(0.5*x^2+10) + 1; y <- y + rnorm(n)/15
# plot(x,y)

x <- c( 0.29,  0.16,  2.46,  1.27,  1.57,  2.22,  1.60,  1.15,  1.21,  
        3.44,  0.05,  0.64,  2.55,  0.36,  0.31,  0.44,  2.08,  2.52,  
        1.55,  1.37,  3.03,  0.85,  0.58,  3.56,  4.35,  2.30,  1.02,  
        2.90,  3.65,  2.48,  1.13,  1.83,  2.02,  3.83,  3.39,  4.51,  
        4.72,  3.28,  5.10,  3.94,  5.54,  4.16,  3.43,  4.82,  6.42,  
        3.43,  6.59,  3.88,  5.48,  5.10,  4.42,  4.91,  5.24,  6.61,  
        6.62,  5.47,  7.31,  5.84,  4.49,  6.91,  6.50,  6.86,  5.86,  
        5.29,  6.27,  4.03,  7.48,  7.85,  7.77,  7.59,  6.82,  6.73,  
        5.78,  7.77,  6.31,  6.72,  7.66,  6.85,  8.18,  7.37,  8.65, 
        8.02,  7.14,  6.88,  9.57,  7.48,  9.53,  8.04,  7.96, 10.30, 
        10.10,  8.36,  8.95,  8.09, 10.38, 10.31,  9.23, 12.07, 10.37,  
        9.72,  9.11,  9.40, 10.31,  9.93,  8.99, 11.60, 12.66,  9.61, 
        10.97,  9.89, 10.82, 11.05, 11.33, 11.99, 12.33, 10.61, 11.97, 
        10.87, 13.27, 11.01, 13.12, 10.22,  9.28, 13.06, 12.75, 12.32, 
        11.74, 12.90, 11.23, 12.91, 13.95, 13.17, 13.99, 14.04, 12.65, 
        12.67, 11.48, 14.35, 14.95, 14.25, 12.89, 16.18, 15.25, 14.66, 
        14.13, 14.81, 15.88, 15.89, 14.88, 15.63, 13.73)

y <- c(1.07, 1.08, 1.41, 1.13, 1.18, 1.47, 1.34, 1.16, 1.20, 1.33, 1.03, 
       1.09, 1.35, 1.05, 1.02, 1.06, 1.38, 1.39, 1.25, 1.21, 1.49, 1.07, 
       1.19, 1.44, 1.35, 1.39, 1.23, 1.33, 1.40, 1.42, 1.14, 1.35, 1.35, 
       1.38, 1.35, 1.47, 1.38, 1.26, 1.43, 1.50, 1.42, 1.51, 1.41, 1.40, 
       1.42, 1.40, 1.38, 1.43, 1.53, 1.35, 1.50, 1.32, 1.43, 1.47, 1.42, 
       1.51, 1.33, 1.36, 1.37, 1.33, 1.41, 1.45, 1.52, 1.48, 1.43, 1.50, 
       1.32, 1.26, 1.34, 1.32, 1.42, 1.49, 1.45, 1.48, 1.46, 1.51, 1.52, 
       1.38, 1.42, 1.49, 1.36, 1.38, 1.40, 1.50, 1.42, 1.36, 1.31, 1.38, 
       1.28, 1.24, 1.32, 1.42, 1.32, 1.43, 1.34, 1.31, 1.34, 1.20, 1.36, 
       1.34, 1.44, 1.29, 1.44, 1.28, 1.39, 1.24, 1.23, 1.41, 1.36, 1.29, 
       1.34, 1.34, 1.25, 1.35, 1.20, 1.30, 1.25, 1.38, 1.22, 1.32, 1.24, 
       1.24, 1.44, 1.22, 1.27, 1.24, 1.24, 1.19, 1.23, 1.30, 1.19, 1.31, 
       1.21, 1.25, 1.22, 1.33, 1.24, 1.22, 1.23, 1.39, 1.24, 1.23, 1.20, 
       1.21, 1.27, 1.15, 1.26, 1.11, 1.15, 1.23, 1.27)

par(mfcol=c(1,1), lwd=2, font.axis=2, bty="l", ps=18) 
plot(x,y)

#########################################################################

                                                          
regramm <- function(x, y, h) {                   # regressogramm function
  cuts <- seq(min(x), max(x)+h, by=h)
  bins <- cut(x, cuts)
  yval <- aggregate(y~bins, FUN=mean)
  return(stepfun(x=cuts[c(-1,-length(cuts))], y=yval$y))
}
                                                            # figure 3.33
par(mfcol=c(1,3), lwd=2, font.axis=2, bty="l", ps=18)  
plot(x, y, main="Regressogramm", las=1)                   # regressogramm 
lines(regramm(x, y, h=0.5), col="blue", lwd=2)
text(15,1.5,"(A)", cex=1.5)

plot(x, y, main="Moving average", las=1)                 # moving average
lines(ksmooth(x, y, kernel="box", bandwidth=1),  col="blue", lwd=2)
text(15,1.5,"(B)", cex=1.5)

plot(x, y, main="Kernel estimation", las=1)           # kernel estimation
lines(ksmooth(x, y, kernel="normal", bandwidth=2),  col="blue", lwd=2)
text(15,1.5,"(C)", cex=1.5)

#########################################################################

                                                          # Section 3.8.2

                                             # cubic spline interpolation
							   
x1 <- 1:7;   xs1 <- seq(1.0,   7.5, by=0.1)  # generate fictive data
x2 <- 8:14;  xs2 <- seq(7.5,  14.5, by=0.1)
x3 <- 15:21; xs3 <- seq(14.5, 21.0, by=0.1)
x  <- c(x1, x2, x3); xs <- seq(1,21, by=0.1)
y1 <- c(5,6.5,7,8,7,5,4); 
y2 <- c(3,2.5,1,1,3,2,5); 
y3 <- c(6,8,10,9,8,7,7.5)
y  <- c(y1, y2, y3)                       # 'truncate' 3rd degree polynom 
                                        
tpow <- function(x, t) (x - t) ^ 3 * (x > t)

                                                            # figure 3.34
par(mfrow=c(1,3), lwd=2, font.axis=2, bty="l", ps=18)										
plot(x, y, main="(A) Cubic spline interpolation") 	 # splines
lines(spline(x, y, xout=seq(1,21, by=0.1)))		

plot(x, y, main="(B) Polynom. regression in section") 	    # regression
abline(v=c(7.5, 14.5), lty=2)   		
s1 <- lm(y1 ~ x1 + I(x1^2) + I(x1^3))					              # 1. section
lines(xs1, predict(s1, data.frame(x1=xs1)))
s2 <- lm(y2 ~ x2 + I(x2^2) + I(x2^3))				                # 2. section
lines(xs2, predict(s2, data.frame(x2=xs2)))
s3 <- lm(y3 ~ x3 + I(x3^2) + I(x3^3))				                # 3. section
lines(xs3, predict(s3, data.frame(x3=xs3)))
symbols(7.5, 3.2, circles = 0.2, inches=0.25, add=TRUE, 
        lty=3, fg="darkgrey")
symbols(14.5, 4.6, circles = 0.2, inches=0.45, add=TRUE, 
        lty=3, fg="darkgrey")

plot(x, y, main="(C) Spline fitting (cubic)")    # Spline fitting (cubic)
fm <- lm(y ~ x + I(x^2) + I(x^3) + I(tpow(x,7.5)) + I(tpow(x,14.5)))
xs <- seq(1, 21, by=0.1)
lines(xs, predict(fm, data.frame(x=xs)), col="red")
abline(v=c(7.5, 14.5), lty=2)

#########################################################################
